# -*- coding: utf-8 -*-
import random
import scrapy
from scrapy import Request
from scrapy.utils.project import get_project_settings
from scrapy.exceptions import IgnoreRequest
from zc_core.util.batch_gen import time_to_batch_no
from zc_core.util.http_util import retry_request
from zc_core.dao.sku_pool_dao import SkuPoolDao
from zc_core.dao.batch_dao import BatchDao
from zc_core.util.done_filter import DoneFilter
from suzhou.rules import *
from suzhou.utils.cookie_builder import build_cookie
from zc_core.spiders.base import BaseSpider


class CertSpider(BaseSpider):
    name = 'cert'
    custom_settings = {
        'CONCURRENT_REQUESTS': 4,
        # 'DOWNLOAD_DELAY': 0.1,
        'CONCURRENT_REQUESTS_PER_DOMAIN': 4,
        'CONCURRENT_REQUESTS_PER_IP': 4,
    }
    # 常用链接
    item_url = 'http://www.zfcgwssc.suzhou.gov.cn/commodities/get_brand_authorize?id={}'

    def __init__(self, batchNo=None, *args, **kwargs):
        super(CertSpider, self).__init__(batchNo=batchNo, *args, **kwargs)
        # 创建批次记录
        BatchDao().create_batch(self.batch_no)

    def start_requests(self):
        settings = get_project_settings()
        pool_list = SkuPoolDao().get_sku_pool_list(query={'certs': {'$exists': False}})
        self.logger.info('全量: %s' % (len(pool_list)))
        random.shuffle(pool_list)
        for sku in pool_list:
            sku_id = sku.get('_id')
            # 采集商品关联关系
            yield Request(
                url=self.item_url.format(sku_id),
                meta={
                    'reqType': 'cert',
                    'batchNo': self.batch_no,
                    'skuId': sku_id,
                },
                headers={
                    'Host': 'www.zfcgwssc.suzhou.gov.cn',
                    'Connection': 'keep-alive',
                    'Cache-Control': 'max-age=0',
                    'Upgrade-Insecure-Requests': '1',
                    'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/70.0.3538.25 Safari/537.36 Core/1.70.3861.400 QQBrowser/10.7.4313.400',
                    'Accept': 'text/html,application/xhtml+xml,application/xml;q=0.9,image/webp,image/apng,*/*;q=0.8',
                    'Accept-Encoding': 'gzip, deflate',
                    'Accept-Language': 'zh-CN,zh;q=0.9',
                    # 'Cookie': f'area_id=320505; _suzhou_session={build_cookie()}',
                },
                callback=self.parse_cert,
                errback=self.error_back,
            )

    # 处理ItemData
    def parse_cert(self, response):
        meta = response.meta
        data = parse_certs(response)
        if data:
            self.logger.info('资质: [%s]' % data.get('skuId'))
            yield data
        else:
            self.logger.info('无资质: %s' % meta.get('skuId'))
